package com.spider.util;

import java.util.Date;
import java.util.Iterator;

import jodd.http.HttpRequest;
import jodd.http.HttpResponse;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.spider.model.Lagou;
import com.spider.model.LagouInfo;

public class LagouUtil {
	
	public static final String indexUrl = "http://www.lagou.com/jobs/list_Java";
	
	public static String fetchIndexHtml(int pageNo) {
		HttpResponse response = HttpRequest.get(indexUrl)
				.query("kd", "Java")
				.query("spc", "1")
				.query("city", "北京")
				.query("pn", pageNo)
				.send();
		String html= response.bodyText();
		return html;
	}
	
	public static String fetchDetailHtml(String url) {
		HttpResponse response = HttpRequest.get(url).send();
		String html= response.bodyText();
		return html;
	}
	
	public static int parseIndex(String html) {
		Document doc = Jsoup.parse(html);
		Elements es = doc.select(".hot_pos>li");//.get(0).children();
		Iterator<Element> iter = es.iterator();
		while(iter.hasNext()) {
			Lagou l = new Lagou();
			Element e = iter.next();
			//第一个div
			Element div1 = e.select(".hot_pos_l").get(0);
			Element a = div1.child(0).select("a").get(0);
			String url = a.attr("href");
			String detailHtml = fetchDetailHtml(url);
			LagouInfo li = parseDetail(detailHtml);
			String title = a.text();
			l.set("title", title);
			Element span1 = div1.child(1);
			span1.select("em").remove();
			String salary = span1.text();
			l.set("salary", salary);
			Element span2 = div1.child(2);
			span2.select("em").remove();
			String experience = span2.text();
			l.set("experience", experience);
			Element span3 = div1.child(3);
			span3.select("em").remove();
			String degree = span3.text();
			l.set("degree", degree);
			Element span4 = div1.child(5);
			span4.select("em").remove();
			String point = span4.text();
			l.set("point", point);
			Element span5 = div1.child(7);
			span5.select("em").remove();
			String time = span5.text();
			l.set("time", time);
			
			//第二个div
			Element div2 = e.select(".hot_pos_r").get(0);
			String company = div2.child(1).select("a").get(0).text();
			l.set("company", company);
			Element span6 = div2.child(2);
			span6.select("em").remove();
			String area = span6.text();
			l.set("area", area);
			
			Element x = div2.child(3);
			String tagName = x.tagName();
			int index = 4;
			if(tagName.equals("br")) {
			} else if(tagName.equals("span")) {
				x.select("em").remove();
				String creater = x.text();
				l.set("creater", creater);
				index++;
			}
			
			
			Element span8 = div2.child(index);
			span8.select("em").remove();
			String stage = span8.text();
			l.set("stage", stage);
			Element span9 = div2.child(index+1);
			span9.select("em").remove();
			String size = span9.text();
			l.set("size", size);
			Elements lis = div2.child(index+2).children();
			if(lis.size()>0) {
				Iterator<Element> iter2 = lis.iterator();
				String point2 = "";
				while(iter2.hasNext()) {
					Element e2 = iter2.next();
					point2 += e2.text()+"、";
				}
				String point2Str = new StringBuilder(point2).deleteCharAt(point2.length()-1).toString();
				l.set("point2", point2Str);
			}
			l.set("create_time", new Date());
			l.save();
			int id = l.get("id");
			li.set("id", id);
			li.save();
		}
		return es.size();
	}
	
	public static LagouInfo parseDetail(String html) {
		Document doc = Jsoup.parse(html);
		Element dd = doc.select("dd.job_bt").get(0);
		String text = dd.text();
		Element div = doc.select("#smallmap").get(0);
		String address = div.previousElementSibling().text();
		LagouInfo li = new LagouInfo();
		li.set("text", text);
		li.set("address", address);
		return li;
	}
	
	public static void main(String[] args) {
		String html = fetchDetailHtml("http://www.lagou.com/jobs/472354.html?source=search");
		parseDetail(html);
	}

}
